- (STPS) Smoke test possible solution: It is the set of tentative errors that can be avoided by using smoke tests
import os
import pymongo
from pymongo import MongoClient
from datetime import date
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
import string
import re
# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# Plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'
import wordninja
# ==============================================================================
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.corpus import words as wordsList
from nltk.corpus import stopwords
# Download resources.
nltk.download('stopwords')
nltk.download('punkt') # first-time use only
nltk.download('wordnet') # first-time use only
nltk.download('words')
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
import math
from collections import Counter
# Configuración warnings
# ==============================================================================
import warnings
[nltk_data] Downloading package stopwords to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package punkt to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package punkt is already up-to-date! [nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date! [nltk_data] Downloading package words to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package words is already up-to-date!
# Smoke Test Parameters
# ==============================================================================
# Plotly configuration
plotly_template="plotly_dark"
# plotly_template="plotly"
# Filters:
JobsNameBlackList = ['test'] # Remove all jobs of analysis with this name
logsWhiteList = ["error","fail", "warning",'err'] # Check lines of logs with this words
JobsStatusWhiteList = ["failed"] # Check jobs with this status
similarity = 0.6 # Group similar text
# DATA:
# mongoDbLimit=1000 # Limit of data request in mongodb
mongoDbLimit=False # Limit of data request in mongodb
csvRead=False
csvSave=False
csvName="data-02-logsAnalysis-" # CSV file name
csvFileRead="data-02-logsAnalysis13-08-2021.csv"
pathExperimentsFiles="/Users/ceciliocannavaciuolo/Documents/workspace/phd/experimentsGitlabColellector"
# Connect with DB
if not csvRead:
MONGODB_URL = os.environ.get('MONGODB_URL')
NODE_ENV = os.environ.get('NODE_ENV') or "dev"
DB_NAME = os.environ.get('APP_NAME') + "-"+ NODE_ENV
client = MongoClient()
client = MongoClient(MONGODB_URL)
db = client[DB_NAME]
if mongoDbLimit:
jobs = db.gitlablogs.find({}).limit(mongoDbLimit) # Read all data
else:
print("@Note-01 ---- 2018711928 -----")
jobs = db.gitlablogs.find({}) # Read all data
# jobs = db.gitlablogs.find({}).limit(100) # Re/ad all data
jobs = pd.DataFrame(list(jobs)) # Convert to DataFrame
print("List of data available iside of db structure")
# Save CSV
if csvSave:
today = date.today()
today = today.strftime("%d-%m-%Y")
jobs.to_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'.csv', index = False)
else:
print("@Note-01 ---- 1350489220 -----")
jobs = pd.read_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'.csv')
jobs.dtypes
@Note-01 ---- 2018711928 ----- List of data available iside of db structure
_id object jobId int64 projectId int64 __v int64 allow_failure bool commitId object commitMessage object commitTitle object committedEmail object created_at datetime64[ns] duration float64 jobLog object jobName object jobRef object jobStage object jobStatus object pipelineId int64 pipelineRef object pipelineStatus object pipelineUrl object pipelineWebUrl object projectDescriptions object projectName object projectNameWithNamespace object queued_duration float64 runnerDescription object runnerId object runnerIpAddress object runnerName object sha object started_at datetime64[ns] username object dtype: object
print("------ DATA REPORT ------")
projectsNumber = len(jobs["projectName"].unique())
print("Number of Projects: "+ str(projectsNumber))
numberOfJobs = len(jobs.index)
print("Number of pipelines Jobs (Steps): "+ str(numberOfJobs))
numberOfSuccess= jobs.loc[jobs["jobStatus"] == "success"]["jobStatus"].count()
numberOfFailed= jobs.loc[jobs["jobStatus"] == "failed"]["jobStatus"].count()
numberOfCancel= jobs.loc[jobs["jobStatus"] == "canceled"]["jobStatus"].count()
successPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfSuccess)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
failedPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfFailed)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
canceledPercentage = (1-((numberOfSuccess+numberOfFailed+numberOfCancel)-numberOfCancel)/(numberOfSuccess+numberOfFailed+numberOfCancel))*100
print("Number of success Jobs (Steps): "+ str(numberOfSuccess) + " or "+str(successPercentage) + " %")
print("Number of failed Jobs (Steps): "+ str(numberOfFailed)+ " or "+str(failedPercentage) + " %")
print("Number of canceled Jobs (Steps): "+ str(numberOfCancel)+ " or "+str(canceledPercentage) + " %")
------ DATA REPORT ------ Number of Projects: 23 Number of pipelines Jobs (Steps): 9616 Number of success Jobs (Steps): 7405 or 88.37570115765605 % Number of failed Jobs (Steps): 578 or 6.898197875641488 % Number of canceled Jobs (Steps): 396 or 4.726100966702473 %
fig = make_subplots(rows=1, cols=2)
fig = px.pie(jobs, names='jobStatus', title='Pipelines Jobs results',color="jobStatus",template=plotly_template)
fig.show()
def createBarGraphByJobStatus(variable):
jobStatusUnique = jobs["jobStatus"].unique().tolist()
df_list = []
for status in jobStatusUnique:
jobs_total = jobs.rename(columns={'jobStatus': status})
total = jobs_total.groupby(by=variable)[status].count()
df_list.append(total)
df = pd.concat(df_list,axis=1)
df = df.sort_values(by=[jobStatusUnique[0]],ascending=True)
fig = px.bar(df, orientation='h',template=plotly_template,title="Number of fails by "+ variable)
fig.show()
print(" Number of jobs projectName types")
createBarGraphByJobStatus("projectName")
print(" Number of jobs stage types")
createBarGraphByJobStatus("jobStage")
createBarGraphByJobStatus("pipelineRef")
Number of jobs projectName types
Number of jobs stage types
#! Create functions
def StemTokens(tokens): return [stemmer.stem(token) for token in tokens]
def StemNormalize(text): return StemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
def LemTokens(tokens): return [lemmer.lemmatize(token) for token in tokens]
def LemNormalize(text): return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
def idf(n,df): result = math.log((n+1.0)/(df+1.0)) + 1; return result
# Init data
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
lemmer = nltk.stem.WordNetLemmatizer()
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
LemVectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
# Filter text
def unionSameText(documents, similarity):
countI = -1
data = []
if len(documents) > 1:
if len(documents) == 1 and documents[0].find(" ") == -1:
data.append(documents)
else:
#? calculate the matrix of matches
LemVectorizer.fit_transform(documents)
tf_matrix = LemVectorizer.transform(documents).toarray()
tfidfTran = TfidfTransformer(norm="l2")
tfidfTran.fit(tf_matrix)
tfidf_matrix = tfidfTran.transform(tf_matrix)
cos_similarity_matrix = (tfidf_matrix * tfidf_matrix.T).toarray()
# Collect unics data:
for i in range(0,len(cos_similarity_matrix)):
countData = 0
sameId = []
token = documents[i]
for i2 in range(0,len(cos_similarity_matrix)):
if cos_similarity_matrix[i,i2] > similarity:
token = documents[i2]
sameId.append(i2)
#! Remove deferents words
textDocument = ""
if len(sameId) > 0:
count = -1
tokenOne = documents[0]
for i3 in sameId:
count = count + 1
if count >= 1:
tokenSplit = documents[count].split(sep = ' ')
for t3 in tokenSplit:
if tokenOne.find(t3) != -1:
textDocument = textDocument + t3 + " "
if textDocument == "":
textDocument = token
d = [textDocument]
data.append(d)
else:
data.append(documents)
data =list(map(list,set(map(tuple,data)))) # Deleret duplicate data
dataList = []
if len(data) > 0:
if len(data[0]) > 0:
for d in data:
dataList.append(d[0])
return dataList
def unique_list(l):
ulist = []
[ulist.append(x) for x in l if x not in ulist]
return ulist
textExample = ['assertion error err assertion global var', 'assertion error err assertion class foo', 'npm err test failed', 'error job failed exit code']
print("==============================")
print(" Before of apply the filters")
print(textExample)
textExample = unionSameText(textExample,0.4)
print(" After of apply the filters")
print(textExample)
============================== Before of apply the filters ['assertion error err assertion global var', 'assertion error err assertion class foo', 'npm err test failed', 'error job failed exit code'] After of apply the filters ['npm err test failed', 'assertion error err assertion ', 'error job failed exit code']
/usr/local/Caskroom/miniforge/base/lib/python3.9/site-packages/sklearn/feature_extraction/text.py:388: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ha', 'le', 'u', 'wa'] not in stop_words.
# Get fragment of text with error
# ==============================================================================
def getErrorText(texto):
#! Get only last range.
nuevo_texto = texto
#! Convert all text to lowercase.
try:
nuevo_texto = nuevo_texto.lower()
except:
return ""
#! Web page removal (words beginning with "http")
nuevo_texto = re.sub('http\S+', ' ', nuevo_texto)
nuevo_texto = nuevo_texto.split(sep = '\n');
whiteList = ["err"];
newTextList = []; nuevoTexto = ""
for text in nuevo_texto:
for listI in whiteList:
if listI in text:
#! Remove special characters
regex = '[\\!\\"\\#\\>\\<\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\;\\\\]\\<\\=\\,\\>\\?\\:\\-\\|\\@\\@\\\\^_\\`\\{\\|\\}\\~]'
text = re.sub(regex , ' ', text)
text = re.sub('http\S+', ' ', text)
#! remove date
text = re.sub('\d{4}-\d{2}-\d{2}', ' ', text)
text = re.sub(' +', ' ', text)
text = re.sub('- - t : :','',text)
text = re.sub(r'[0-9]','',text)
#! Removing emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
text = emoji_pattern.sub(r'', text)
if not text in newTextList:
#! cut string.
text = text.strip() # Split text
text = text.replace('=',' ') # Delete =
text = text.replace('_',' ') # Delete _
text = text.replace(':','') # Delete :
text = text.replace(';','') # Delete ;
text = text.replace("'","'") # Delete '
text = text.replace("'","") # Delete '
text = text.replace("!","") # Delete !
# text = text.replace("m ","") # Delete !
text = text.replace('[',' ').replace(']','') # Delete [ ]
text = re.sub(' +', ' ', text) # Delete multiple spaces
text = text[0:text.find(".")] # Delete text after .
text = ' '.join(unique_list(text.split())) # Delete duplicate words
# Remove not engilish words
text = wordninja.split(text)
text = [ item for pos,item in enumerate(text) if text.index(item)==pos ] # Delete duplicates
nuevoTexto = text
text = ' '.join(text)
newTextList.append(text)
newTextList = [string for string in newTextList if string.strip()]# Remove multiple empty spaces from string List
newTextList = [string for string in newTextList if string != " "] # Delete empty strings
newTextList = [string for string in newTextList if string != ""] # Delete empty strings
newTextList = [string for string in newTextList if (len(string) > 2)] # Eliminación de tokens con una longitud < 2 o que se encuentren en la lista de palabras ignoradas
unics = set(); newTextList = [string for string in newTextList if string not in unics and (unics.add(string) or True)] # Delete duplicate data
unics = set(); newTextList = [string for string in newTextList if string not in unics and (unics.add(string) or True)] # Delete duplicate data
newTextList = unionSameText(newTextList,0.4) #TODO ative
textList = []
for text in newTextList:
text = ' '.join(unique_list(text.split()))
textList.append(text)
return textList
textExample = '''
\u001b[0KRunning with gitlab-runner 13.11.0 (7f7a4bb0)\n\u001b[0;m\u001b[0K on pax-italia-pot HRhNkEr3\n\u001b[0;msection_start:1628150616:prepare_executor\r\u001b[0K\u001b[0K\u001b[36;1mPreparing the \"shell\" executor\u001b[0;m\n\u001b[0;m\u001b[0KUsing Shell executor...\n\u001b[0;msection_end:1628150616:prepare_executor\r\u001b[0Ksection_start:1628150616:prepare_script\r\u001b[0K\u001b[0K\u001b[36;1mPreparing environment\u001b[0;m\n\u001b[0;mRunning on TEST-POT...\r\nsection_end:1628150616:prepare_script\r\u001b[0Ksection_start:1628150616:get_sources\r\u001b[0K\u001b[0K\u001b[36;1mGetting source from Git repository\u001b[0;m\n\u001b[0;m\u001b[32;1mFetching changes...\u001b[0;m\r\nReinitialized existing Git repository in C:/GitLab-Runner/builds/HRhNkEr3/0/tech-gap-italia/pax-italia-pot/pax-italia-pot-api/.git/\n\u001b[32;1mChecking out fec6b887 as deployTest...\u001b[0;m\r\nRemoving .m2/\nRemoving db/\nRemoving potStatusCode.txt\nRemoving target/\ngit-lfs/2.13.3 (GitHub; windows amd64; go 1.16.2; git a5e65851)\n\r\n\u001b[32;1mSkipping Git submodules setup\u001b[0;m\r\nsection_end:1628150622:get_sources\r\u001b[0Ksection_start:1628150622:restore_cache\r\u001b[0K\u001b[0K\u001b[36;1mRestoring cache\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mChecking cache for %CI_COMMIT_REF_SLUG%-1...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=3224 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\nNo URL provided, cache will not be downloaded from shared cache server. Instead a local version of cache will be extracted.\u001b[0;m \n\u001b[32;1mSuccessfully extracted cache\u001b[0;m\r\nsection_end:1628150623:restore_cache\r\u001b[0Ksection_start:1628150623:download_artifacts\r\u001b[0K\u001b[0K\u001b[36;1mDownloading artifacts\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mDownloading artifacts for updateDataBase (1478970780)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=2688 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970780/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970780/1591261703/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=eO9MxI2VRMDf1Po5llLAjH6nRO%2FbfZmI%2Fpt9%2F8Lvtnl8KW6BsSIdHH%2B4QJmy%0Awb8qlZxqkxl9VKyb1x9C%2FPJrDoH9Pz%2FwqmuVBQjWCiUbl2mWdHKrBiJr1A9y%0A%2BlNkIOp8Cdn1XWe9m50qCh1gBO5I5CRgzslCzMYZn4QcoZtC%2FQuPEzHm2n0x%0AemV8kGdcq3Z2JIom8oqU91wkdxx1a4IvvXCmgIAdPZ26OInj%2BhKA4cVeCp%2FO%0AKbPqLVBz%2BZgnuj5klSJdPei2I3vb%2F9D6v02K3mgqPwim0u9vjLCUHDZZVUU1%0AZbJ7Ms63VIuYY8OWrwmok2ZtZiFj%2BVoUeLav1xECKg%3D%3D&Expires=1628151224: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970780 \u001b[31;1mtoken\u001b[0;m=gZyEXDvG\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970780 responseStatus\u001b[0;m=200 OK token\u001b[0;m=gZyEXDvG\n\u001b[32;1mDownloading artifacts for compilePackage (1478970783)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=5644 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970783/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970783/1591271101/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=1ejAvX%2FpdR%2FbDY4W9t7gNwTj%2F%2FoQKBzwaYvKO7pLWFka30UwPLOJJ7fi4vCe%0A1CZojYoZdQ0Yrb8IMhTG9jg6WTf3b5zsWPbwlwMdU%2BZn4hRP01NjY9GRPeqx%0A4S9A2T85qUjL%2F%2BOfXrorqP%2FiFB%2BgI%2BO0pemERPNtqI27ms%2BS4VJATLzpkpIl%0AMRhZPMd0YYP9StKmoWxPEwNtXjcKh%2FevoourBU2Orn%2BXIbgqlHpZtntZagpv%0ARt8KUTohOwqjah9N53J%2B%2BRBtqmmIN6z2tyglu1kqqVBFdPMRntt8PXjXNrIK%0AZIRt9C1wR%2FKzdJD4ZEcWkmIz8LELSYuFJXYUUTGGzA%3D%3D&Expires=1628151226: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970783 \u001b[31;1mtoken\u001b[0;m=SEszxyKc\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970783 responseStatus\u001b[0;m=200 OK token\u001b[0;m=SEszxyKc\n\u001b[32;1mDownloading artifacts for smokeTest (1478970788)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=2608 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970788/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970788/1591285532/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=HavL1z7T%2BJwTsaIW0zHoB9aUI4vkawvyoDunTn85PukDEltxhxfi4ZHJ2130%0At7R1PDjZE18RfAe%2B80zCY6nY66%2B%2Be8%2F096sHPaqf5vkhBE16ml%2FQH0ALLn7F%0ARmDQoI83rcOTEE5%2BK35RDh8I9gV%2BP62H90zSTlCfxWx4OUqHfssA4OI26B10%0Am9bDXfXORZys78YNFMGMAidb7PaMc7HVDf%2FhhRwo%2BqpaROCttOxyYVi%2Bgsgj%0AXwwK%2BUdAtf7gHi%2BU9CpBXV2wOjjk6wqow62vxsz0%2F5ioBedAkpTX5CFiFCfO%0AgmuptgwEYSeLu%2BOPnXKNFH1kYoIf%2BoqdXrlOmCkxlA%3D%3D&Expires=1628151247: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970788 \u001b[31;1mtoken\u001b[0;m=mzxPxF9Y\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970788 responseStatus\u001b[0;m=200 OK token\u001b[0;m=mzxPxF9Y\nsection_end:1628150649:download_artifacts\r\u001b[0Ksection_start:1628150649:step_script\r\u001b[0K\u001b[0K\u001b[36;1mExecuting \"step_script\" stage of the job script\u001b[0;m\n\u001b[0;m\u001b[32;1m$ echo \"CREATE PRODUCTION ACTIFACTS\"\u001b[0;m\r\nCREATE PRODUCTION ACTIFACTS\r\nsection_end:1628150650:step_script\r\u001b[0Ksection_start:1628150650:archive_cache\r\u001b[0K\u001b[0K\u001b[36;1mSaving cache for successful job\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mCreating cache %CI_COMMIT_REF_SLUG%-1...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=1832 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[0;33mWARNING: binaries/: no matching files \u001b[0;m \nArchive is up to date! \u001b[0;m \n\u001b[32;1mCreated cache\u001b[0;m\r\nsection_end:1628150650:archive_cache\r\u001b[0Ksection_start:1628150650:upload_artifacts_on_success\r\u001b[0K\u001b[0K\u001b[36;1mUploading artifacts for successful job\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mUploading artifacts...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=5780 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n./db: found 20 matching files and directories \u001b[0;m \n./target/tool-repair-api-0.0.1-SNAPSHOT.jar: found 1 matching files and directories\u001b[0;m \n./flyway.production.properties: found 1 matching files and directories\u001b[0;m \n./InstructionsDeploy.txt: found 1 matching files and directories\u001b[0;m \n./pom.xml: found 1 matching files and directories \u001b[0;m \nUploading artifacts as \"archive\" to coordinator... ok\u001b[0;m id\u001b[0;m=1478970795 responseStatus\u001b[0;m=201 Created token\u001b[0;m=YqeuAFFa\nsection_end:1628150666:upload_artifacts_on_success\r\u001b[0Ksection_start:1628150666:cleanup_file_variables\r\u001b[0K\u001b[0K\u001b[36;1mCleaning up file based variables\u001b[0;m\n\u001b[0;msection_end:1628150667:cleanup_file_variables\r\u001b[0K\u001b[32;1mJob succeeded\n\u001b[0;m
'''
errorText = getErrorText(textExample)
print(errorText)
['m error downloading artifacts from coordinator', 'm warning retrying']
from spellchecker import SpellChecker
spell = SpellChecker()
def check(word):
if word == spell.correction(word):
return True
else:
return False
check("hello")
def removeWordIfNotExist(textList):
textListFilter = []
for text in textList:
textList2 = [];
for t in text.split(sep = ' '):
# if t in wordsList.words() and len(t) >1: # TODO TIME PROBLEM
if check(t):
textList2.append(t)
textListFilter.append(' '.join(textList2))
return textListFilter
textExample = ['m error downloading artifacts from coordinator', 'm warning retrying']
text = removeWordIfNotExist(textExample)
print(text)
['error downloading artifacts from coordinator', 'warning retrying']
textExample = '''
\u001b[0KRunning with gitlab-runner 13.11.0 (7f7a4bb0)\n\u001b[0;m\u001b[0K on pax-italia-pot HRhNkEr3\n\u001b[0;msection_start:1628150616:prepare_executor\r\u001b[0K\u001b[0K\u001b[36;1mPreparing the \"shell\" executor\u001b[0;m\n\u001b[0;m\u001b[0KUsing Shell executor...\n\u001b[0;msection_end:1628150616:prepare_executor\r\u001b[0Ksection_start:1628150616:prepare_script\r\u001b[0K\u001b[0K\u001b[36;1mPreparing environment\u001b[0;m\n\u001b[0;mRunning on TEST-POT...\r\nsection_end:1628150616:prepare_script\r\u001b[0Ksection_start:1628150616:get_sources\r\u001b[0K\u001b[0K\u001b[36;1mGetting source from Git repository\u001b[0;m\n\u001b[0;m\u001b[32;1mFetching changes...\u001b[0;m\r\nReinitialized existing Git repository in C:/GitLab-Runner/builds/HRhNkEr3/0/tech-gap-italia/pax-italia-pot/pax-italia-pot-api/.git/\n\u001b[32;1mChecking out fec6b887 as deployTest...\u001b[0;m\r\nRemoving .m2/\nRemoving db/\nRemoving potStatusCode.txt\nRemoving target/\ngit-lfs/2.13.3 (GitHub; windows amd64; go 1.16.2; git a5e65851)\n\r\n\u001b[32;1mSkipping Git submodules setup\u001b[0;m\r\nsection_end:1628150622:get_sources\r\u001b[0Ksection_start:1628150622:restore_cache\r\u001b[0K\u001b[0K\u001b[36;1mRestoring cache\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mChecking cache for %CI_COMMIT_REF_SLUG%-1...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=3224 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\nNo URL provided, cache will not be downloaded from shared cache server. Instead a local version of cache will be extracted.\u001b[0;m \n\u001b[32;1mSuccessfully extracted cache\u001b[0;m\r\nsection_end:1628150623:restore_cache\r\u001b[0Ksection_start:1628150623:download_artifacts\r\u001b[0K\u001b[0K\u001b[36;1mDownloading artifacts\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mDownloading artifacts for updateDataBase (1478970780)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=2688 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970780/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970780/1591261703/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=eO9MxI2VRMDf1Po5llLAjH6nRO%2FbfZmI%2Fpt9%2F8Lvtnl8KW6BsSIdHH%2B4QJmy%0Awb8qlZxqkxl9VKyb1x9C%2FPJrDoH9Pz%2FwqmuVBQjWCiUbl2mWdHKrBiJr1A9y%0A%2BlNkIOp8Cdn1XWe9m50qCh1gBO5I5CRgzslCzMYZn4QcoZtC%2FQuPEzHm2n0x%0AemV8kGdcq3Z2JIom8oqU91wkdxx1a4IvvXCmgIAdPZ26OInj%2BhKA4cVeCp%2FO%0AKbPqLVBz%2BZgnuj5klSJdPei2I3vb%2F9D6v02K3mgqPwim0u9vjLCUHDZZVUU1%0AZbJ7Ms63VIuYY8OWrwmok2ZtZiFj%2BVoUeLav1xECKg%3D%3D&Expires=1628151224: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970780 \u001b[31;1mtoken\u001b[0;m=gZyEXDvG\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970780 responseStatus\u001b[0;m=200 OK token\u001b[0;m=gZyEXDvG\n\u001b[32;1mDownloading artifacts for compilePackage (1478970783)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=5644 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970783/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970783/1591271101/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=1ejAvX%2FpdR%2FbDY4W9t7gNwTj%2F%2FoQKBzwaYvKO7pLWFka30UwPLOJJ7fi4vCe%0A1CZojYoZdQ0Yrb8IMhTG9jg6WTf3b5zsWPbwlwMdU%2BZn4hRP01NjY9GRPeqx%0A4S9A2T85qUjL%2F%2BOfXrorqP%2FiFB%2BgI%2BO0pemERPNtqI27ms%2BS4VJATLzpkpIl%0AMRhZPMd0YYP9StKmoWxPEwNtXjcKh%2FevoourBU2Orn%2BXIbgqlHpZtntZagpv%0ARt8KUTohOwqjah9N53J%2B%2BRBtqmmIN6z2tyglu1kqqVBFdPMRntt8PXjXNrIK%0AZIRt9C1wR%2FKzdJD4ZEcWkmIz8LELSYuFJXYUUTGGzA%3D%3D&Expires=1628151226: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970783 \u001b[31;1mtoken\u001b[0;m=SEszxyKc\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970783 responseStatus\u001b[0;m=200 OK token\u001b[0;m=SEszxyKc\n\u001b[32;1mDownloading artifacts for smokeTest (1478970788)...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=2608 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[31;1mERROR: Downloading artifacts from coordinator... error couldn't execute GET against https://gitlab.com/api/v4/jobs/1478970788/artifacts?direct_download=true: Get https://storage.googleapis.com/gitlab-gprd-artifacts/a5/35/a5358b9b40fe2e633b7d371c0dc4767d051c7fc2cbcc5d15e596c9c1bcb4d0ac/2021_08_05/1478970788/1591285532/artifacts.zip?GoogleAccessId=gitlab-object-storage-prd@gitlab-production.iam.gserviceaccount.com&Signature=HavL1z7T%2BJwTsaIW0zHoB9aUI4vkawvyoDunTn85PukDEltxhxfi4ZHJ2130%0At7R1PDjZE18RfAe%2B80zCY6nY66%2B%2Be8%2F096sHPaqf5vkhBE16ml%2FQH0ALLn7F%0ARmDQoI83rcOTEE5%2BK35RDh8I9gV%2BP62H90zSTlCfxWx4OUqHfssA4OI26B10%0Am9bDXfXORZys78YNFMGMAidb7PaMc7HVDf%2FhhRwo%2BqpaROCttOxyYVi%2Bgsgj%0AXwwK%2BUdAtf7gHi%2BU9CpBXV2wOjjk6wqow62vxsz0%2F5ioBedAkpTX5CFiFCfO%0AgmuptgwEYSeLu%2BOPnXKNFH1kYoIf%2BoqdXrlOmCkxlA%3D%3D&Expires=1628151247: x509: certificate signed by unknown authority\u001b[0;m \u001b[31;1mid\u001b[0;m=1478970788 \u001b[31;1mtoken\u001b[0;m=mzxPxF9Y\n\u001b[0;33mWARNING: Retrying... \u001b[0;m \u001b[0;33merror\u001b[0;m=invalid argument\nDownloading artifacts from coordinator... ok \u001b[0;m id\u001b[0;m=1478970788 responseStatus\u001b[0;m=200 OK token\u001b[0;m=mzxPxF9Y\nsection_end:1628150649:download_artifacts\r\u001b[0Ksection_start:1628150649:step_script\r\u001b[0K\u001b[0K\u001b[36;1mExecuting \"step_script\" stage of the job script\u001b[0;m\n\u001b[0;m\u001b[32;1m$ echo \"CREATE PRODUCTION ACTIFACTS\"\u001b[0;m\r\nCREATE PRODUCTION ACTIFACTS\r\nsection_end:1628150650:step_script\r\u001b[0Ksection_start:1628150650:archive_cache\r\u001b[0K\u001b[0K\u001b[36;1mSaving cache for successful job\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mCreating cache %CI_COMMIT_REF_SLUG%-1...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=1832 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n\u001b[0;33mWARNING: binaries/: no matching files \u001b[0;m \nArchive is up to date! \u001b[0;m \n\u001b[32;1mCreated cache\u001b[0;m\r\nsection_end:1628150650:archive_cache\r\u001b[0Ksection_start:1628150650:upload_artifacts_on_success\r\u001b[0K\u001b[0K\u001b[36;1mUploading artifacts for successful job\u001b[0;m\n\u001b[0;mVersion: 13.11.0\nGit revision: 7f7a4bb0\nGit branch: 13-11-stable\nGO version: go1.13.8\nBuilt: 2021-04-20T17:02:32+0000\nOS/Arch: windows/amd64\n\u001b[32;1mUploading artifacts...\u001b[0;m\r\nRuntime platform \u001b[0;m arch\u001b[0;m=amd64 os\u001b[0;m=windows pid\u001b[0;m=5780 revision\u001b[0;m=7f7a4bb0 version\u001b[0;m=13.11.0\n./db: found 20 matching files and directories \u001b[0;m \n./target/tool-repair-api-0.0.1-SNAPSHOT.jar: found 1 matching files and directories\u001b[0;m \n./flyway.production.properties: found 1 matching files and directories\u001b[0;m \n./InstructionsDeploy.txt: found 1 matching files and directories\u001b[0;m \n./pom.xml: found 1 matching files and directories \u001b[0;m \nUploading artifacts as \"archive\" to coordinator... ok\u001b[0;m id\u001b[0;m=1478970795 responseStatus\u001b[0;m=201 Created token\u001b[0;m=YqeuAFFa\nsection_end:1628150666:upload_artifacts_on_success\r\u001b[0Ksection_start:1628150666:cleanup_file_variables\r\u001b[0K\u001b[0K\u001b[36;1mCleaning up file based variables\u001b[0;m\n\u001b[0;msection_end:1628150667:cleanup_file_variables\r\u001b[0K\u001b[32;1mJob succeeded\n\u001b[0;m
'''
errorText = getErrorText(textExample)
errorText = removeWordIfNotExist(errorText)
print(errorText)
['error from', 'warning']
textExample = '''
✓ Statements \n ✓ Exceptions \n ✓ Static \n ✓ Class (55ms) \n ✓ Class props (47ms) \n 3) Types \n 19 passing (1s) \n 3 failing \n 1) AST translation \n Reserved keyword: \n 2) AST translation
Templates:
AssertionError [ERR_ASSERTION]: 'global[var];' == 'global[var_r];'
+ expected - actual
at translates (test/helper.js:24:16)
at Context.<anonymous> (test/translation.js:91:9)
at processImmediate (internal/timers.js:456:21)
3) AST translation
Types:
AssertionError [ERR_ASSERTION]: 'class Foo {\n' +
' annotated(untyped, class: Cls.Name, self: Foo, array: {} | any[], callable: Function, bool: boolean, float: number, int: number, string: string, iter: {} | any[]) {}\n' +
'\n' +
'};' == 'class Foo {\n' +
' annotated(untyped, class_r: Cls.Name, self: Foo, array: {} | any[], callable: Function, bool: boolean, float: number, int: number, string: string, iter: {} | any[]) {}};'
+ expected - actual
class Foo {
- annotated(untyped, class: Cls.Name, self: Foo, array: {} | any[], callable: Function, bool: boolean, float: number, int: number, string: string, iter: {} | any[]) {}
-
-};
+ annotated(untyped, class_r: Cls.Name, self: Foo, array: {} | any[], callable: Function, bool: boolean, float: number, int: number, string: string, iter: {} | any[]) {}};
at translates (test/helper.js:24:16)
at Context.<anonymous> (test/translation.js:198:9)
at processImmediate (internal/timers.js:456:21)
npm ERR! Test failed. See above for more details.
ERROR: Job failed: exit code 1
'''
errorText = getErrorText(textExample)
print(errorText)
# ['error job failed exit code', 'assertion error err class foo', 'n pm err test failed', 'assertion error err global var']
['error job failed exit cod', 'assertion error err', 'n pm err test failed']
textExample = '''
* [new branch] frontend-test -> origin/frontend-test
* [new branch] functional-testing-junit -> origin/functional-testing-junit
* [new branch] master -> origin/master
* [new branch] sol-dev -> origin/sol-dev
* [new branch] sql-fix-branch -> origin/sql-fix-branch
* [new branch] testing-report -> origin/testing-report
* [new tag] CR-H1-2021-deploy-intermedio -> CR-H1-2021-deploy-intermedio
* [new tag] v1dffdfdfdf0_20190405 -df dff1dff0df0_20190405
* [new tag] v1dffdfdfdf1_20190424 -df dff1dff0df1_20190424
Checking out 18b927c2 as devdfdfdffdffdf
Skipping Git submodules setup
Downloading artifacts for build-jar (1473723797)dfdfdffdffdf
Downloading artifacts from coordinatordfdfdffdffdf ok id=1473723797 responseStatus=200 OK token=G4kf2uft
$ echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/confdfdfdfjson
$ /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --destination $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
INFO[0001] Retrieving image manifest openjdk:11.0
INFO[0001] Retrieving image openjdk:11.0 from registry inddfdfdfdodfkdfrdfio
error building image: GET https://index.dockdfdfdfio/v2/library/openjdk/manifestdf/df1df0: TOOMANYREQUESTS: You have reached your pull ratedflidfitdf You may increase the limit by authenticating and upgrading: httdfs:/dfwdfwdfddfckerdfcom/increase-rate-limit
ERROR: Job failed: command terminated with exit code 1
'''
errorText = getErrorText(textExample)
print(errorText)
# This need to do defferent to 0.
assert len(errorText) > 0
print("✅ PASS TEST: OK 👍")
textExample = '''
>>>> 03_add_account_toEntity-06 : |'\n │ '|------------------------------------------------|'\n │ \n │ 'https://edutelling-api-develop.openshidfdfdftecdfgdfpdfit/a\n │ pi/v1/tutors/create/tutor-from-ambassador?ambassadorId\n │ ='\n │ 'RESPONSE'\n │ \n │ `{\"success\":true,\"message\":\"Tutor '#17:-2' cdfeadfeddf\",\"\n │ data\":{\"tutorId\":\"#17:-2\"}}`\n │ '|-**********************************************-|'\n │ \n └\n\n→ 04_login_no2_v3\n POST https://edutelling-apidfdevdflopddfopendfhdfftdftdfchgapdfit/api/v1/auth/authentication [200 OK, 798B, 137ms]\n ✓ [(POST)/api/v1/auth/authentication] Login Delete (200)\n\n→ 05_login_switch_how_account\n POST https://edutellidfg-api-dfedfelopdfodffenshiftdfftechgapdfit/api/v1/auth/complete-authedftication df200 OK, 1df01KB, 27ms]\n ✓ CHECK IF EXIST JWT\n ✓ CHECK IF EXIST jwtRefresh\n ✓ [(POST)/api/v1/auth/authentication] Login Delete (200)\n\n→ 01_entityTypeAccountService_getAll\n GET https://dfdutellingdfdfpi-dedfelodfdfopensdfiftdftechgapdfit/apdf/v1/accounts/dfmbassador@botdfcom/all [200 OK, 680B, 23ms]\n ┌\n │ 'deleteAccountId :', '#12:1564'\n │ 'deleteAccountId :', '12%3A1564'\n └\n\n→ 02_reamoveAccount\n DELETE dfttps://eddftelldfndf-api-dedfelopdfodfenshiftdftechgapdfit/api/v1/accounts/remove?dataId=12%3A1564 [200 OK, 397B, 15ms]\n\n→ goTo_workflowControl\n OPdfIONS httpdf://edutdfflling-apidfdevelopdffopenshiftdftechgapdfit [404 Not Found, 255B, 4ms]\n\nAttempting to set next request to controlOfWorkFlowTempalte_06\n\n→ controlOfWorkFlowTempalte_06dfn OPTIONdf https:df/edfutelling-dfpi-devedfopdfopenshiftdftechgapdfit [404 Not Found, 255B, 6ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"config_multiAccount_studentHowAmbassador\"],\"\n │ executed\":[false,true]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', 'config_m\n │ ultiAccount_studentHowAmbassador'\n │ \n │ \n └\n\nAttempting to set next request to config_multiAccount_studentHowAmbassador\n\n→ config_multiAccount_studentHowAmbadfsador\n dfPTIONS dfttps:df/edutellidfg-api-ddfvelopdfopenshiftdftechgapdfit [404 Not Found, 255B, 5ms]\n\nAttempting to set next request to controlOfWorkFlowTempalte_06\n\n→ controlOfWorkFlodfTempalte_df6\n OPdfIONS httdfs://edutedfling-apdf-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 8ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"stop_06_createCourseModuleAndStage\"]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', '01_entit\n │ yTypeAccountService_getAll_06'\n │ \n │ \n └\n\nAttempting to set next request to 01_entityTypeAccountService_getAll_06\n\n→ 01_entityTypeAcdfountServidfe_getAldf_06\n GET dfttps://eddftellindfdfapi-developdfopenshiftdftechgadfdfit/api/v1/accounts/student@botdfcom/all [200 OK, 475B, 8ms]\n ┌\n │ 'entityTypeUppperCase: ', 'Student'\n │ '20%3A273'\n df\n\n→ 02_dfeamoveAdfcount_06\n DELdfTE https:df/eduteldfing-api-developdfopenshiftdftechgapdfit/api/v1/accounts/remove?dataId=12%3A1564 [200 OK, 379B, 53dfs]\n\n→ 0df_add_acdfount_toEntity-06\ndf POST httdfs://edudfelling-api-developdfopenshiftdftechgapdfit/api/v1/ambassadors/create/ambassador-from-student?studentId=20%3A273 [200 OK, 420B, 84ms]\n ✓ [(POST) /api/v1/teachers/create/teacher-from-student?studentId ] Status code is 200\n ✓ Check if was Success to add access student how teacher\n ┌\n │ \n │ '|------------------------------------------------|'\n │ '| >>>> 03_add_account_toEntity-06 : |'\n │ '|-------------df---------df-------df----------------|'\n df│ \n │ 'dfttps://dfdutelling-api-developdfopenshiftdftechgapdfit/a\n │ pi/v1/ambassadors/create/ambassador-from-student?stude\n │ ntId='\n │ 'RdfSPONSE'\n │ \n │ `{\"success\":true,\"medfsage\":\"Ambassador '#19:-2' create\n │ ddf\",\"data\":{\"ambassadorId\":\"#19:-2\"}}`\n │ '|-********************************df*********df***-|'\df │ \n └\n\n→ 04_login_ndf2_v3\n PdfST httpdf://edutelling-api-developdfopenshiftdftechgapdfit/api/v1/auth/authentication [200 OK, 790B, 152ms]\n ✓ [(POST)/api/v1/auth/authenticatidfn] Login dfelete (df00)\n\n→ 05_login_switch_howdfaccount\ndf POST hddftps://edutelling-api-developdfopenshiftdftechgapddfit/api/v1/auth/complete-authentication [200 OK, 1df03KB, 23ms]\n ✓ CHECK IF EXIST JWT\n ✓ CHECK IF EXIST jwtRefresh\n ✓ [(POST)/api/v1/auth/authenticadfion] Logidf Deletedf(200)\n\n→ 01_entityTypeAccoundfSdfrvice_getdfll\n GdfT https://edutelling-api-develdfpdfopenshiftdftechgapdfit/api/v1/accounts/student@botdfcom/all [200 OK, 676B, 9ms]\n ┌\n │ 'deleteAccountId :', '#12:15df5'\n │ 'dfeleteAcdfountId :', '12%3A1565'\n └\n\n→ 02_dfeamoveAccdfunt\n dfELETE https://edutelling-api-developdfopenshiftdftechgapdfit/api/v1/accoudfts/removedfdataId=df2%3A1565 [200 OK, 397B, 37ms]\n\n→ goTodfworkflowCdfntrol\ndf OPTIONS https://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 6ms]\n\nAttempting to sedf next reqdfest to dfontrolOfWorkFlowTempalte_06\n\n→ controlOfdforkFlowTedfpalte_0df\n OPTIONS https://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, 255B, 4ms]\n ┌\n │ ' ------ ------ ---------- ---------'\n │ \n │ '{\"nameOfConfigFile\":\"config_multiAccount_tutorHowAmba\n │ ssador\",\"templateRun\":[\"01_entityTypeAccountService_ge\n │ tAll_06\",\"stop_06_createCourseModuleAndStage\"],\"execut\n │ ed\":[false,true]}'\n │ \n │ 'WORKFLOW GO TO: ------------------------'\n │ \n │ 'WORKFLOW CONTROL REDIRECTING TEMPATE TO: ', 'stop_06_\n │ createCourseModuleAndStage'\n │ \n │ \n └\n\nAttempting to set nextdfrequest tdf stop_0df_createCourseModuleAndStage\n\n→ stop_06_creadfeCourseModfuleAndSdfage\n OPTIONS httpdf://edutelling-api-developdfopenshiftdftechgapdfit [404 Not Found, df55B, 5ms]\nsummary: 0\nNUMBER OF FAILS 0\ncollection run completeddf\n\n┌─────────────────────────┬───────────────────┬──────────────────┐\n│ │ executed │ failed │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ iterations │ 1 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ requests │ 425 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ test-scripts │ 425 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ prerequest-scripts │ 3 │ 0 │\n├─────────────────────────┼───────────────────┼──────────────────┤\n│ assertions │df 247 │ 0 │\n├─────────────────────────┴df──────────────────┴──────────────────┤\n│ total run duration: 4m df9df3s │\n├────────────────────────df───────────────────────────────────────┤\n│ total data received: 755df83KB (approx) df df │\n├───────────────────────────────────────────────────df─df──────────┤\n│ avedfagedfresponse time: 44ms [min: 3ms, max: 605ms, sdfddf: 58ms] │\n└──df───df───ddf───────df─────────────────────────────────────────────┘\nDone indfdf60df84sdddf\n\u001dff[32;1m$ echo 'http://edutelling-functional-test-backenddfdfpenshiftddftechgapdffit/'\u001b[0;m\nhttp://edutelling-functional-test-backenddfopenshiftdftechgapdfit/\nsection_end:1626336539:step_script\r\u001b[0Ksection_start:1626336539:cleanup_file_variables\r\u001b[0K\u001b[0K\u001b[36;1mCleaning up file based variables\u001b[0;m\n\u001b[0;msection_end:1626336540:cleanup_file_variables\r\u001b[0K\u001b[32;1mJob succeeded\n\u001b[0;m
'''
errorText = getErrorText(textExample)
assert len(errorText) == 0
print("✅ PASS TEST: OK 👍")
# ['error building image get toomanyrequests you have reached your pull ratedflidfitdf you may increase the limit by authenticating and upgrading httdfs /dfwdfwdfddfckerdfcom/increase rate limi', 'error job failed command terminated with exit cod']
['error building image get too many requests you have reached your pull rated fl idf it df may increase the limit by authenticating and upgrading ht td fs wdf ddf cker com rate lim i', 'error job failed command terminated with exit cod'] ✅ PASS TEST: OK 👍 ✅ PASS TEST: OK 👍
textExample = '''
Running with gitlab-runner 13.11.0 (7f7a4bb0)
on pax-italia-pot HRhNkEr3
Preparing the "shell" executor
Using Shell executor...
Preparing environment
Running on TEST-POT...
Getting source from Git repository
Fetching changes...
Reinitialized existing Git repository in C:/GitLab-Runner/builds/HRhNkEr3/0/tech-gap-italia/pax-italia-pot/pax-italia-pot-api/.git/
Checking out fec6b887 as deployTest...
Removing .m2/
Removing db/
Removing potStatusCode.txt
Removing target/
git-lfs/2.13.3 (GitHub; windows amd64; go 1.16.2; git a5e65851)
Skipping Git submodules setup
Restoring cache
Version: 13.11.0
Git revision: 7f7a4bb0
Git branch: 13-11-stable
GO version: go1.13.8
Built: 2021-04-20T17:02:32+0000
OS/Arch: windows/amd64
show more (open the raw output data in a text editor) ...
'''
errorText = getErrorText(textExample)
print(errorText)
[]
# The cleaning and tokenization function is applied to each job
# ==============================================================================
df = jobs
df['jobLog_token'] = df['jobLog'].apply(lambda x: getErrorText(x))
# ! Delete all empty list
df = df[(df['jobLog_token'].str.len() != 0) | (df['jobLog_token'].str.len() != 0)]
df[['jobId','jobLog', 'jobLog_token']].head(10)
| jobId | jobLog | jobLog_token | |
|---|---|---|---|
| 0 | 1478970795 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [m error downloading artifacts from coordinato... |
| 1 | 1475883801 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 2 | 1224726085 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [restore s ys] |
| 3 | 1482619837 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 5 | 1441896399 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 7 | 1482619836 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 8 | 1475883799 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 9 | 1224705777 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [fully qualified error id command not found ex... |
| 11 | 1441896396 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 12 | 1478970791 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [m error downloading artifacts from coordinato... |
# Remove word in Not Exist in english dicctoriary
# ==============================================================================
df = jobs
df['jobLog_token'] = df['jobLog_token'].apply(lambda x: removeWordIfNotExist(x))
# ! Delete all empty list
df = df[(df['jobLog_token'].str.len() != 0) | (df['jobLog_token'].str.len() != 0)]
df[['jobId','jobLog', 'jobLog_token']].head(10)
| jobId | jobLog | jobLog_token | |
|---|---|---|---|
| 0 | 1478970795 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [error downloading artifacts from coordinator,... |
| 1 | 1475883801 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 2 | 1224726085 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [restore ys] |
| 3 | 1482619837 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 5 | 1441896399 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 7 | 1482619836 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 8 | 1475883799 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 9 | 1224705777 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [fully qualified error id command not found ex... |
| 11 | 1441896396 | [0KRunning with gitlab-runner 14.1.0-rc1 (e94... | [health check err o] |
| 12 | 1478970791 | [0KRunning with gitlab-runner 13.11.0 (7f7a4b... | [error downloading artifacts from coordinator,... |
# Obtaining a list of stopwords in English
# ==============================================================================
stop_words = list(stopwords.words('english'))
# Se añade la stoprword: amp, ax, ex
stop_words.extend(("amp", "xa", "xe"))
print(stop_words[:10])
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]
# Unnest de la columna texto_tokenizado
# ==============================================================================
jobs_tidy = df.explode(column='jobLog_token')
jobs_tidy = jobs_tidy.drop(columns='jobLog')
jobs_tidy = jobs_tidy.rename(columns={'jobLog_token':'token'})
jobs_tidy.head(3)
| _id | jobId | projectId | __v | allow_failure | commitId | commitMessage | commitTitle | committedEmail | created_at | ... | projectNameWithNamespace | queued_duration | runnerDescription | runnerId | runnerIpAddress | runnerName | sha | started_at | username | token | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 61164ad101a14071451ddcaa | 1478970795 | 11018055 | 0 | False | fec6b8878763fd43a5890fb877c0710af4dd93c2 | Update Dockerfile | Update Dockerfile | a.pirovano@anoki.it | 2021-08-05 07:45:37.521 | ... | Tech Gap / pax-italia-pot / API | 393.382081 | pax-italia-pot | None | 93.46.36.76 | gitlab-runner | fec6b8878763fd43a5890fb877c0710af4dd93c2 | 2021-08-05 08:03:35.055 | a.pirovano | error downloading artifacts from coordinator |
| 0 | 61164ad101a14071451ddcaa | 1478970795 | 11018055 | 0 | False | fec6b8878763fd43a5890fb877c0710af4dd93c2 | Update Dockerfile | Update Dockerfile | a.pirovano@anoki.it | 2021-08-05 07:45:37.521 | ... | Tech Gap / pax-italia-pot / API | 393.382081 | pax-italia-pot | None | 93.46.36.76 | gitlab-runner | fec6b8878763fd43a5890fb877c0710af4dd93c2 | 2021-08-05 08:03:35.055 | a.pirovano | warning retrying |
| 1 | 61164ad101a14071451ddcab | 1475883801 | 13390016 | 0 | False | 782102fc4a6762a83e64c61397cd026086f45ccd | switched to private docker registry\n | switched to private docker registry | s.dipaolo@anoki.it | 2021-08-04 09:00:24.628 | ... | Tech Gap / ckp / ckp-api | 0.293696 | shared-runners-manager-6.gitlab.com | None | 35.207.54.55 | gitlab-runner | 782102fc4a6762a83e64c61397cd026086f45ccd | 2021-08-04 09:02:04.999 | solcis | health check err o |
3 rows × 32 columns
jobs_tidy.groupby(by='jobStatus')['token'].count()
jobStatus canceled 128 failed 4464 running 35 success 24421 Name: token, dtype: int64
jobs_tidy.groupby(by='projectName')['token'].count()
projectName API 18 Antora 10834 AuroraStore 387 ClearURLs 28 Dropzone 912 EduTelling 10 GitFox 4054 GitLab 981 HomelabOS 4 Meltano 786 TRusT-FE 6 app-prenotazioni-api 3 babel-preset-php 16 ckp-api 6 gitlab-runner 532 hypercorn 1101 inkscape 2636 manyverse 478 pax-Italia-pot-cicd 9 pmbootstrap 2664 quart 463 webapp 3120 Name: token, dtype: int64
# Total words per event
# ==============================================================================
print('--------------------------')
print('Total words per event')
print('--------------------------')
jobs_tidy.groupby(by='jobStatus')['token'].nunique()
-------------------------- Total words per event --------------------------
jobStatus canceled 45 failed 618 running 28 success 504 Name: token, dtype: int64
# Longitud media y desviación de los jobs de cada jobStatus
# ==============================================================================
temp_df = pd.DataFrame(jobs_tidy.groupby(by = ["jobStatus", "jobId"])["token"].count())
temp_df.reset_index().groupby("jobStatus")["token"].agg(['mean', 'std'])
| mean | std | |
|---|---|---|
| jobStatus | ||
| canceled | 3.200000 | 2.278101 |
| failed | 7.736568 | 6.999003 |
| running | 5.000000 | 3.464102 |
| success | 6.108304 | 7.468022 |
# Top 50 palabras más utilizadas por cada evento
# ==============================================================================
jobs_tidy_text = jobs_tidy.groupby(['jobStatus','token','commitMessage', 'jobStage', 'jobName'])['token'] \
.count() \
.reset_index(name='count') \
.groupby('jobStatus') \
.apply(lambda x: x.sort_values('count', ascending=False).head(10))
jobs_tidy_text
| jobStatus | token | commitMessage | jobStage | jobName | count | ||
|---|---|---|---|---|---|---|---|
| jobStatus | |||||||
| canceled | 88 | canceled | ok false error code description bad request ca... | try again\n | build | assembleDebug | 2 |
| 0 | canceled | assertion error | Add a UUID to each uploaded chunk\n | test | test | 1 | |
| 80 | canceled | no | Fix parent transform bug\n | build | inkscape:windows: [MINGW64] | 1 | |
| 93 | canceled | packages ming adobe source code pro fonts | Fix parent transform bug\n | build | inkscape:windows: [MINGW64] | 1 | |
| 92 | canceled | packages ming adobe source code pro fonts | Added the NonIntersectingPathsBuilder::items_i... | build | inkscape:windows: [MINGW64] | 1 | |
| 91 | canceled | packages dal | Fix parent transform bug\n | build | inkscape:windows: [MINGW64] | 1 | |
| 90 | canceled | packages dal | Added the NonIntersectingPathsBuilder::items_i... | build | inkscape:windows: [MINGW64] | 1 | |
| 89 | canceled | packages dal | Added a check for empty sets in InteractiveSha... | build | inkscape:windows: [MINGW32] | 1 | |
| 87 | canceled | ok false error code description bad request ca... | CI: try html pase mode again\n | build | assembleDebug | 1 | |
| 86 | canceled | ok false error code description bad request ca... | add {}\n | build | assembleDebug | 1 | |
| failed | 908 | failed | error | dx: setup CI builds\n | build | build_release | 45 |
| 2372 | failed | section end build script start after upload ar... | dx: setup CI builds\n | build | build_release | 23 | |
| 1921 | failed | lib error exp at fake i | dx: setup CI builds\n | build | build_release | 23 | |
| 290 | failed | builds terra kok it lab client | Merge branch 'develop'\n | test | unitTests | 23 | |
| 368 | failed | builds terra kok it lab client build ui in al or | Merge branch 'lib_updates' into 'develop'\n\nL... | code_quality | lint | 15 | |
| 1940 | failed | make | Prepare changelog with !2240\n | security | retire-js-dependency_scanning | 14 | |
| 2262 | failed | register res generating task is use generated ... | Merge branch 'develop'\n | test | unitTests | 13 | |
| 2335 | failed | section end build script start after upload ar... | Merge branch 'develop'\n | test | unitTests | 13 | |
| 2999 | failed | terra err builds kok it lab client build ui in... | Setup CI release signing.\n | build | buildRelease | 11 | |
| 1532 | failed | get bionic main amd lib error all | dx: setup CI builds\n | build | build_release | 11 | |
| running | 3260 | running | should not error if is an | reorder test in content aggregator\n | verify | test:windows | 3 |
| 3238 | running | ee spec geo terraform state version err o for ... | Merge branch '337062-make-sure-user-informatio... | test | rspec-ee unit pg12 geo 1/2 | 2 | |
| 3233 | running | ee spec geo terraform state version | Merge branch '333507-use-sub-batches' into 'ma... | test | rspec-ee unit pg12 geo 1/2 | 1 | |
| 3250 | running | packages ming adobe source code pro fonts | more snapping fixes\n | build | inkscape:windows: [MINGW64] | 1 | |
| 3263 | running | warning active model errors keys is and will b... | Merge branch '337062-make-sure-user-informatio... | test | rspec-ee system pg12 4/6 | 1 | |
| 3262 | running | should show location of syntax error when stac... | reorder test in content aggregator\n | verify | test:windows | 1 | |
| 3261 | running | should show error message if specified play bo... | reorder test in content aggregator\n | verify | test:windows | 1 | |
| 3259 | running | should not allow custom attributes to override... | reorder test in content aggregator\n | verify | test:windows | 1 | |
| 3258 | running | should not | reorder test in content aggregator\n | verify | test:windows | 1 | |
| 3257 | running | should emit an error if one of the items is not a | reorder test in content aggregator\n | verify | test:windows | 1 | |
| success | 4596 | success | builds terra kok it lab client build ui in al or | Merge branch 'support/extend_markdown_support-... | code_quality | lint | 49 |
| 4595 | success | builds terra kok it lab client build ui in al or | Merge branch 'support/extend_markdown_support-... | build | build | 47 | |
| 4564 | success | builds terra kok it lab client build ui in al or | Merge branch 'feature/extend_markdown_support-... | code_quality | lint | 39 | |
| 11672 | success | should not error if is an | make CLI test slightly more robust\n | verify | test:windows | 35 | |
| 4597 | success | builds terra kok it lab client build ui in al or | Merge branch 'support/extend_markdown_support-... | test | unitTests | 30 | |
| 4563 | success | builds terra kok it lab client build ui in al or | Merge branch 'feature/extend_markdown_support-... | build | build | 27 | |
| 11839 | success | should not error if is an | rename freeze helper to deepFreeze in playbook... | verify | test:windows | 25 | |
| 4581 | success | builds terra kok it lab client build ui in al or | Merge branch 'fix/restore_webview' into 'devel... | test | unitTests | 24 | |
| 4580 | success | builds terra kok it lab client build ui in al or | Merge branch 'fix/restore_webview' into 'devel... | code_quality | lint | 24 | |
| 4545 | success | builds terra kok it lab client build ui in al or | Merge branch 'bugfix/empty-labels-and-mileston... | code_quality | lint | 22 |
# Clean text and apply filters
# ==============================================================================
def getTextFilter(textList):
unics = set(); textList = [string for string in textList if string not in unics and (unics.add(string) or True)] # Delete duplicate data
listText2 = []
for text in textList:
regex = '[\\!\\"\\#\\>\\<\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\;\\\\\]\\<\\=\\,\\>\\?\\:\\-\\|\\@\\@\\\\^_\\`\\{\\|\\\\}\\~]'
text = text.lower()
text = re.sub(regex , ' ', text)
text = re.sub('http\S+', ' ', text)
text = text.replace("\n", "")
text = text + "\n"
# Delete stop
text_temp = []
for tweet in text.split(sep = ' '):
if tweet not in stop_words:
text_temp.append(tweet.replace('\n',''))
listText2 = listText2+text_temp
unics = set(); listText2 = [string for string in listText2 if string not in unics and (unics.add(string) or True)] # Delete duplicate data
listText2 = ', '.join(listText2)
return listText2
textList = ['Merge branch \'344-projectqueryfragments-500-error\' into \'develop\'\n\nResolve "projectQueryFragments 500 error"\n\nCloses #344\n\nSee merge request tech-gap-italia/ckp/ckp-api!247', 'Merge branch \'342-add-an-image-to-project-description\' into \'develop\'\n\nResolve "Add an image to project description"\n\nCloses #342\n\nSee merge request tech-gap-italia/ckp/ckp-api!245', "Merge branch 'develop' into sidip\n", 'Merge C1-S21 and C2-S20, Important! Is necessary update the db with 0303 and 0304\n', 'Update .gitlab-ci.yml', 'Merge branch \'23-fix-report-device-in-repair\' into \'dev\'\n\nResolve "Fix report device in repair"\n\nCloses #23\n\nSee merge request tech-gap-italia/pax-italia-pot/pax-italia-pot-api!36']
print(" --- --- Before applying the filter --- ---")
print(textList)
textOut = getTextFilter(textList)
print(" --- --- After applying the filter --- ---")
print(textOut)
--- --- Before applying the filter --- --- ['Merge branch \'344-projectqueryfragments-500-error\' into \'develop\'\n\nResolve "projectQueryFragments 500 error"\n\nCloses #344\n\nSee merge request tech-gap-italia/ckp/ckp-api!247', 'Merge branch \'342-add-an-image-to-project-description\' into \'develop\'\n\nResolve "Add an image to project description"\n\nCloses #342\n\nSee merge request tech-gap-italia/ckp/ckp-api!245', "Merge branch 'develop' into sidip\n", 'Merge C1-S21 and C2-S20, Important! Is necessary update the db with 0303 and 0304\n', 'Update .gitlab-ci.yml', 'Merge branch \'23-fix-report-device-in-repair\' into \'dev\'\n\nResolve "Fix report device in repair"\n\nCloses #23\n\nSee merge request tech-gap-italia/pax-italia-pot/pax-italia-pot-api!36'] --- --- After applying the filter --- --- merge, branch, , 344, projectqueryfragments, 500, error, develop, resolve, closes, 344see, request, tech, gap, italia/ckp/ckp, api, 247, 342, add, image, project, description, 342see, 245, sidip, c1, s21, c2, s20, important, necessary, update, db, 0303, 0304, .gitlab, ci.yml, 23, fix, report, device, repair, dev, 23see, italia/pax, italia, pot/pax, pot, 36
# Collect data
# https://sites.temple.edu/tudsc/2017/03/30/measuring-similarity-between-texts-in-python/
#================================================================================
def StemTokens(tokens):
return [stemmer.stem(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def StemNormalize(text):
return StemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
nltk.download('wordnet') # first-time use only
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
LemVectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
def idf(n,df):
result = math.log((n+1.0)/(df+1.0)) + 1
return result
def groupDataFrame(jobs_tidy_text, jobStatusUnique, similarity):
countI = -1
data = []
LemVectorizer = CountVectorizer(tokenizer=LemNormalize, stop_words='english')
for status in jobStatusUnique:
jobs_temp = jobs_tidy_text[jobs_tidy_text["jobStatus"] == status]
documents = jobs_temp['token'].to_list()
countX = jobs_temp['count'].to_list()
if len(documents) > 0:
LemVectorizer.fit_transform(documents)
tf_matrix = LemVectorizer.transform(documents).toarray()
tfidfTran = TfidfTransformer(norm="l2")
tfidfTran.fit(tf_matrix)
tfidf_matrix = tfidfTran.transform(tf_matrix)
cos_similarity_matrix = (tfidf_matrix * tfidf_matrix.T).toarray()
# Collect unics data:
commitMessage = ""; jobStage=""; jobName="";
for i in range(0,len(cos_similarity_matrix)):
countData = 0
for i2 in range(0,len(cos_similarity_matrix)):
if cos_similarity_matrix[i,i2] > similarity:
token = documents[i2]
countData = countData + countX[i2]
commitMessage = getTextFilter(jobs_temp['commitMessage'].to_list())
jobStage = getTextFilter(jobs_temp['jobStage'].to_list())
jobName = getTextFilter(jobs_temp['jobName'].to_list())
d = [status, token, countData,commitMessage,jobStage,jobName]
data.append(d)
data =list(map(list,set(map(tuple,data)))) # Deleret duplicate data
df_stps = pd.DataFrame(data, columns = ['jobStatus', 'token', 'count',"commitMessage","jobStage","jobName"])
if countI == -1:
countI = countI + 1
df_STPS = df_stps
else:
df_STPS.append(df_stps)
return df_STPS
# LemVectorizer
df_stps = groupDataFrame(pd.DataFrame(jobs_tidy_text,columns = ['jobStatus', 'token', 'count','commitMessage','jobStage','jobName']),
jobs["jobStatus"].unique().tolist(),
similarity
)
print("=======================================")
print("Text related to a similarity of:")
print("=======================================")
df_stps
======================================= Text related to a similarity of: =======================================
[nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date! /usr/local/Caskroom/miniforge/base/lib/python3.9/site-packages/sklearn/feature_extraction/text.py:388: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['ha', 'le', 'u', 'wa'] not in stop_words.
| jobStatus | token | count | commitMessage | jobStage | jobName | |
|---|---|---|---|---|---|---|
| 0 | canceled | ok false error code description bad request ca... | 3 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 1 | failed | get bionic main amd lib error all | 11 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 2 | failed | section end build script start after upload ar... | 36 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 3 | canceled | packages dal | 3 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 4 | success | builds terra kok it lab client build ui in al or | 262 | merge, branch, , support/extend, markdown, sup... | code, quality, build, verify, test | lint, build, test, windows, unittests |
| 5 | failed | error | 45 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 6 | canceled | assertion error | 1 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 7 | running | should show error message if specified play bo... | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 8 | failed | lib error exp at fake i | 23 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 9 | canceled | assertion error | 0 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 10 | failed | make | 14 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 11 | running | should not allow custom attributes to override... | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 12 | running | should not error if is an | 3 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 13 | failed | register res generating task is use generated ... | 13 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 14 | running | should not allow custom attributes to override... | 0 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 15 | running | ee spec geo terraform state version | 3 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 16 | canceled | ok false error code description bad request ca... | 1 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 17 | failed | terra err builds kok it lab client build ui in... | 49 | dx, , setup, ci, builds, merge, branch, develo... | build, test, code, quality, security | build, release, unittests, lint, retire, js, d... |
| 18 | canceled | packages ming adobe source code pro fonts | 2 | try, again, add, uuid, uploaded, chunk, fix, p... | build, test | assembledebug, test, inkscape, windows, , [min... |
| 19 | running | should emit an error if one of the items is not a | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 20 | running | should show location of syntax error when stac... | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 21 | running | packages ming adobe source code pro fonts | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 22 | running | warning active model errors keys is and will b... | 1 | reorder, test, content, aggregator, merge, bra... | verify, test, build | test, windows, rspec, ee, unit, pg12, geo, 1/2... |
| 23 | success | should not error if is an | 60 | merge, branch, , support/extend, markdown, sup... | code, quality, build, verify, test | lint, build, test, windows, unittests |
nltk.download('wordnet') # first-time use only
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
print(string.punctuation)
!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
[nltk_data] Downloading package wordnet to [nltk_data] /Users/ceciliocannavaciuolo/nltk_data... [nltk_data] Package wordnet is already up-to-date!
# Order dataframe.
df_stps= df_stps.sort_values(by=['jobStatus'])
fig = go.Figure(data=[go.Table(header=dict(values=[
'jobStatus',
'STPS',"Events"
]),
columnwidth = [90,90,30,40,20,10],
cells=dict(values=[
df_stps['token'].to_list(),
df_stps['commitMessage'].to_list(),
df_stps['jobStage'].to_list(),
df_stps['jobName'].to_list(),
df_stps['jobStatus'].to_list(),
df_stps['count'].to_list()
],
align='left',
font_size=14,
height=30
))
])
fig.show()
# df_stps.to_csv(pathExperimentsFiles+'/dataAnalysis/'+csvName+today+'_STPS.csv', index = False)
# Top 10 palabras por jobStatus (sin stopwords)
# ==============================================================================
jobStatusUnique = df_stps["jobStatus"].unique().tolist()
df_list = []
for status in jobStatusUnique:
jobs_total = df_stps.rename(columns={'jobStatus': status})
total = jobs_total.groupby(by="token")[status].count()
df_list.append(total)
df = pd.concat(df_list,axis=1)
df = df.sort_values(by=["failed"],ascending=True)
fig = px.bar(df, orientation='h',template=plotly_template,title="Number of fails by token")
fig.show()
import sys
sys.executable
'/usr/local/Caskroom/miniforge/base/bin/python'